1 Objective

The goal of this session is to get a clean macula densa dataset to work with.

2 Load Packages

options(future.globals.maxSize = 74 * 1024^3) # 55 GB
getOption("future.globals.maxSize") #59055800320
## [1] 79456894976

3 Load Dataset

SO1 <- LoadSeuratRds(here("outputs", "so_merge_raw.rds"))


head(SO1@meta.data)

4 How many of these are actually MD cells

SO.markers <- FindAllMarkers(SO1, only.pos = TRUE)
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1)
SO.markers %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1) %>%
    slice_head(n = 5) %>%
    ungroup() -> top10
DoHeatmap(SO1, features = top10$gene) + NoLegend()

markers.to.plot1 <- c("Lrp2",         # PT
                      "Slc5a12",      # PT-S1
                      "Slc13a3",      # PT-S2
                      "Slc16a9",      # PT-S3
                      "Havcr1",       # Injured PT
                      "Epha7",        # dTL
                      "Cryab",        # dTL
                      "Cdh13",        # dTL1
                      "Slc14a2",      # dTL2
                      "Slc12a1",      # TAL
                      "Umod",         # TAL, DCT1
                      "Egf",          # TAL, DCT1,
                      "Cldn10",       # TAL
                      "Cldn16",       # TAL
                      "Nos1",         # MD
                      "Slc12a3",      # DCT
                      "Pvalb",        # DCT1
                      "Slc8a1",       # DCT2, CNT
                      "Aqp2",         # PC
                      "Slc4a1",       # IC-A
                      "Slc26a4",      # IC-B
                      "Nphs1",        # Podo
                      "Ncam1",        # PEC
                      "Flt1",         # Endo
                      "Emcn",         # Glom Endo
                      "Kdr",          # Capillary Endo
                      "Pdgfrb",       # Perivascular
                      "Pdgfra",       # Fib
                      "Piezo2",       # Mesangial
                      "Acta2",        # Mural
                      "Ptprc",        # Immune
                      "Cd74",         # Macrophage
                      "Skap1",        # B/T Cells 
                      "Upk1b",        # Uro
                      "Top2a"         # Proliferation
)
                      
DotPlot(SO1,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()

DimPlot(SO1)

5 Subset out non MD cells.

SO2 <- subset(SO1, idents = c("4", "5", "6", "7"), invert = TRUE)

SO3 <- subset(SO2, features = grep("^mt-|^Gm|Rik|^Rp", rownames(SO2), invert = TRUE, value = TRUE))

DimPlot(SO2)

SO3 <- SCTransform(SO3) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:15) %>%
    FindClusters(resolution = 2) %>%
    RunUMAP(dims = 1:15)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14658 by 11529
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 2 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 302 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14658 genes
## Computing corrected count matrix for 14658 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 15.87326 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: The `slot` argument of `SetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1 
## Positive:  Umod, Egf, Tmem52b, Sult1d1, Sostdc1, Fabp3, Prdx5, Foxq1, Klk1, Cox6c 
##     Wfdc15b, Krt7, Ly6a, Ckb, Slc25a5, Cox5b, Wfdc2, Atp5g1, Cldn19, Ggt1 
##     Ndufa4, Cox4i1, Atp1a1, Chchd10, Atp5b, Cox8a, Gadd45g, Cox6b1, Cox7a2, Uqcr11 
## Negative:  Pappa2, Zfand5, Aard, Robo2, Pde10a, Wwc2, Itga4, Nadk2, S100g, Nos1 
##     Ramp3, Neat1, Sgms2, Ptgs2, Col4a4, Irx1, Col4a3, Tmem158, Mir6236, Ranbp3l 
##     Itprid2, Bmp3, Cdkn1c, Camk2d, Dctd, Mcub, Sdc4, Etnk1, Peg3, Zbtb20 
## PC_ 2 
## Positive:  Fos, Junb, Jun, Egr1, Btg2, Hspa1b, Hspa1a, Atf3, Zfp36, Ier2 
##     Fosb, Socs3, Klf6, Klf2, Jund, Dusp1, Dnajb1, Rhob, Gadd45g, Tsc22d1 
##     Gadd45b, Klf4, H3f3b, Ubc, Cebpd, Actb, H1f2, H2bc4, Ppp1r15a, Hspb1 
## Negative:  Pappa2, Egf, Slc12a1, CT010467.1, Sfrp1, Mir6236, Etnk1, Umod, Wnk1, Sec14l1 
##     Robo2, Hsp90b1, Pde10a, Car15, Atp1b1, Col4a4, Aard, Wwc2, Mal, Itprid2 
##     Mgst1, Aebp1, Col4a3, Bmp3, Trim2, Nos1, Sgms2, Zbtb20, Pth1r, Tfcp2l1 
## PC_ 3 
## Positive:  Fth1, Ubb, Ftl1, Ldhb, Fxyd2, Car15, Prdx1, Mt1, Eif1, Cd63 
##     Cd9, Mpc2, Mgst1, Clu, Bsg, Tmem213, Aldoa, Mdh1, Ramp3, S100a1 
##     Selenow, Spp1, Tmem176b, Tspo, H3f3b, Wfdc2, Atpif1, Lgals3, Tmem59, Tmbim6 
## Negative:  Egf, Mir6236, CT010467.1, Umod, Tmem52b, Neat1, Nme7, Malat1, Slc12a1, Kcnq1ot1 
##     Lars2, Etnk1, Wnk1, Dst, Sult1d1, Foxq1, Slc5a3, Atrx, Atp1b1, Pnisr 
##     Fos, Syne2, Gls, Ivns1abp, Hnrnpa2b1, Srrm2, Chka, Col4a4, Paxbp1, Zbtb20 
## PC_ 4 
## Positive:  Mt1, Apoe, Mt2, Aebp1, Sostdc1, Gpx6, Fxyd2, Neat1, Ptger3, Ckb 
##     Fgf9, Egfl6, Ivns1abp, Defb1, Mfsd4a, Car4, Fkbp11, Chchd10, Plet1, Hspa1b 
##     Atp5md, Igfbp5, Cox6c, Tmem213, Atp5k, CT010467.1, Chka, Mgst3, Lpl, Atp1a1 
## Negative:  Pappa2, Aard, Tmem52b, Umod, Sult1d1, Tmem158, Egf, Foxq1, Mcub, Ptgs2 
##     Dctd, Wwc2, Iyd, Ramp3, S100g, Ptprz1, Cd9, Car15, Hsp90b1, Tmsb4x 
##     Defb42, Cdkn1c, Wnk1, Pth1r, Tagln2, Lcn2, Il1f6, Clu, Ctsc, Bmp2 
## PC_ 5 
## Positive:  Hspa1b, Hspa1a, Umod, Egf, Jun, Cldn10, Pappa2, Car15, Klk1, Fos 
##     Sfrp1, Fth1, Ier3, Clcnkb, Atp1a1, Slc12a1, Cited2, Hspa8, Pik3r1, Cdkn1c 
##     Robo2, Kng2, Aard, Gsta4, Ptger3, Fau, Irx1, Ftl1, Uba52-ps, Tmem37 
## Negative:  S100g, Actb, Tmem52b, Igfbp7, Cryab, S100a6, Tacstd2, Tmsb10, Tmsb4x, Atf3 
##     Lcn2, Sult1d1, Foxq1, Crip1, Egr1, Ppia, Atp5md, Lgals1, Mir6236, Gadd45b 
##     Klf4, Ifitm3, Ccnd1, Ccn1, Hbegf, Serf2, Spp1, Nupr1, Cdkn1a, Krt7
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11529
## Number of edges: 359040
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7101
## Number of communities: 25
## Elapsed time: 1 seconds
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 02:13:48 UMAP embedding parameters a = 0.9922 b = 1.112
## 02:13:48 Read 11529 rows and found 15 numeric columns
## 02:13:48 Using Annoy for neighbor search, n_neighbors = 30
## 02:13:48 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 02:13:49 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp1vVbwR/filef66b3347ff1b
## 02:13:49 Searching Annoy index using 1 thread, search_k = 3000
## 02:13:51 Annoy recall = 100%
## 02:13:51 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 02:13:52 Initializing from normalized Laplacian + noise (using RSpectra)
## 02:13:52 Commencing optimization for 200 epochs, with 484894 positive edges
## 02:13:52 Using rng type: pcg
## 02:13:54 Optimization finished
DimPlot(SO3)

DimPlot(SO3,split.by = "treatment")

DotPlot(SO3,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()
## Warning: Could not find Slc5a12 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Kdr in the default search locations, found in 'RNA'
## assay instead
## Warning: The following requested variables were not found: Slc13a3, Havcr1,
## Pdgfrb, Pdgfra, Piezo2, Upk1b

head(SO3)
library(plotly)

dim_plot <- DimPlot(SO3, reduction = "umap",split.by = "treatment") +
  ggtitle("UMAP Dimensional Reduction")
interactive_plot <- ggplotly(dim_plot)

# Display the interactive plot
interactive_plot

6 Simplifying Dataset with similarities

SO4 <- subset(SO3, idents = c("23"), invert = TRUE)

DimPlot(SO4)

SO4 <- SCTransform(SO4) %>%
    RunPCA() %>%
    FindNeighbors(dims = 1:16) %>%
    FindClusters(resolution = 0.7) %>%
    RunUMAP(dims = 1:16)
## Running SCTransform on assay: RNA
## vst.flavor='v2' set. Using model with fixed slope and excluding poisson genes.
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 14577 by 11448
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 5000 cells
## There are 2 estimated thetas smaller than 1e-07 - will be set to 1e-07
## Found 292 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 14577 genes
## Computing corrected count matrix for 14577 genes
## Calculating gene attributes
## Wall clock passed: Time difference of 15.51806 secs
## Determine variable features
## Centering data matrix
## Place corrected count matrix in counts slot
## Warning: Different cells and/or features from existing assay SCT
## Set default assay to SCT
## PC_ 1 
## Positive:  Umod, Egf, Tmem52b, Sult1d1, Sostdc1, Fabp3, Klk1, Prdx5, Cox6c, Foxq1 
##     Ly6a, Wfdc15b, Wfdc2, Slc25a5, Krt7, Cox5b, Ckb, Atp5g1, Cldn19, Cox4i1 
##     Ndufa4, Atp1a1, Ggt1, Cox8a, Atp5b, Chchd10, Cox6b1, Cox7a2, Uqcrq, Uqcr11 
## Negative:  Pappa2, Zfand5, Aard, Robo2, Wwc2, Pde10a, Mir6236, Itga4, Nadk2, Neat1 
##     Nos1, S100g, Col4a4, Ramp3, Sgms2, Col4a3, Ptgs2, Irx1, Tmem158, Itprid2 
##     Ranbp3l, Bmp3, Camk2d, Sdc4, Etnk1, Cdkn1c, Zbtb20, Srrm2, Hnrnpa2b1, Peg3 
## PC_ 2 
## Positive:  Fos, Junb, Jun, Hspa1b, Hspa1a, Btg2, Egr1, Zfp36, Atf3, Ier2 
##     Fosb, Socs3, Klf6, Klf2, Jund, Dnajb1, Dusp1, Tsc22d1, Rhob, Gadd45g 
##     H3f3b, Gadd45b, Ubc, Cebpd, Mt1, Mt2, Actb, H2bc4, H1f2, Klf4 
## Negative:  Mir6236, CT010467.1, Egf, Pappa2, Slc12a1, Umod, Etnk1, Wnk1, Nme7, Atp1b1 
##     Sfrp1, Robo2, Sptbn1, Col4a4, Sec14l1, Hsp90b1, Zbtb20, Dst, Itprid2, Tmem52b 
##     Lars2, Pde10a, Kcnq1ot1, App, Mal, Atrx, Col4a3, Tfcp2l1, Syne2, Nfe2l1 
## PC_ 3 
## Positive:  Egf, Mir6236, CT010467.1, Umod, Tmem52b, Neat1, Nme7, Fos, Jun, Malat1 
##     Slc12a1, Junb, Kcnq1ot1, Lars2, Egr1, Sult1d1, Foxq1, Etnk1, Dst, Wnk1 
##     Slc5a3, Atp1b1, Atf3, Fosb, Zfp36, Btg2, Atrx, Klf6, Pnisr, Ivns1abp 
## Negative:  Fth1, Ubb, Ftl1, Fxyd2, Ldhb, Car15, Prdx1, Cd63, Cd9, Eif1 
##     Mpc2, Mgst1, Mt1, Tmem213, Bsg, Clu, Mdh1, Aldoa, Itm2b, Ramp3 
##     Selenow, Tmem59, S100a1, Spp1, Tmem176b, Tspo, Tmbim6, Tle5, Atpif1, Wfdc2 
## PC_ 4 
## Positive:  Pappa2, Aard, Tmem52b, Umod, Egf, Sult1d1, Tmem158, Foxq1, Mcub, Ptgs2 
##     Dctd, Wwc2, Iyd, Car15, Ramp3, Ptprz1, Cd9, Hsp90b1, S100g, Wnk1 
##     Pth1r, Cdkn1c, Defb42, Clu, Ctsc, Tmsb4x, Tagln2, Bmp2, Col4a3, Tsc22d1 
## Negative:  Mt1, Apoe, Mt2, Aebp1, Sostdc1, Gpx6, Fxyd2, Ptger3, Neat1, Ckb 
##     Fgf9, Egfl6, Ivns1abp, Mfsd4a, Defb1, Car4, CT010467.1, Plet1, Fkbp11, Atp5md 
##     Chchd10, Cox6c, Atp5k, Igfbp5, Tmem213, Mgst3, Chka, Atp5mpl, Avpr1a, Lpl 
## PC_ 5 
## Positive:  CT010467.1, Hspa1b, Hspa1a, Klk1, Pappa2, Car15, Cldn10, Mir6236, Fth1, Lars2 
##     Jun, Hspa8, Wfdc2, Itm2b, Ftl1, Fau, Uba52-ps, Ly6a, Sfrp1, Aard 
##     Pik3r1, Mal, Eef1a1, Atp1a1, Ptger3, Atp1b1, Tspo, Hspb1, Tmem176a, Gpx4 
## Negative:  S100g, Actb, Tmem52b, Sdc4, Abhd2, Egr1, Uroc1, Tmsb10, Ppia, Atf3 
##     Serf2, Slc39a1, Alkbh5, Ndufb1-ps, Cebpd, Igfbp7, Cox6c, Atp5md, Ramp3, Ndufa3 
##     Wfdc15b, Kdm6b, Atp5k, Atp5e, Rbm47, Ldhb-ps, Ranbp3l, Ppm1h, Atp5mpl, Rcan1
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11448
## Number of edges: 356414
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7941
## Number of communities: 15
## Elapsed time: 0 seconds
## 02:14:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 02:14:23 Read 11448 rows and found 16 numeric columns
## 02:14:23 Using Annoy for neighbor search, n_neighbors = 30
## 02:14:23 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 02:14:23 Writing NN index file to temp file /var/folders/7n/x74qctp91rng390gx0z9hmd80000gn/T//Rtmp1vVbwR/filef66b5958c9ec
## 02:14:23 Searching Annoy index using 1 thread, search_k = 3000
## 02:14:25 Annoy recall = 100%
## 02:14:25 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 02:14:26 Initializing from normalized Laplacian + noise (using RSpectra)
## 02:14:26 Commencing optimization for 200 epochs, with 483954 positive edges
## 02:14:26 Using rng type: pcg
## 02:14:28 Optimization finished
DimPlot(SO4)

DimPlot(SO4,split.by = "treatment")

DotPlot(SO4,
features = markers.to.plot1,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5)+
coord_flip()
## Warning: Could not find Slc5a12 in the default search locations, found in 'RNA'
## assay instead
## Warning: Could not find Kdr in the default search locations, found in 'RNA'
## assay instead
## Warning: The following requested variables were not found: Slc13a3, Havcr1,
## Pdgfrb, Pdgfra, Piezo2, Upk1b

SO4@meta.data$seurat_clusters <- factor(
  SO4@meta.data$seurat_clusters,
  levels = c("0", "1", "3", "4", "10", "2", "11", "5", "13", "6", "9", "14", "7", "8", "12")
)


Idents(SO4) <- SO4$seurat_clusters


## Simplifying into a few types

markers.to.plot2 <- c("Pappa2",
                      "Nos1",
                      "Egf",
                      "Umod",
                      "Cldn19",
                      "Foxq1",
                      "Sult1d1",
                      "Fos",
                      "Junb",
                      "Cxcl10",
                      "Isg15"
)

DimPlot(SO4)

DotPlot(SO4,
features = markers.to.plot2,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "seurat_clusters",
col.max = 2.5)+
coord_flip()

7 Assigning Subclasses for clusters

SO4@meta.data <- SO4@meta.data %>%
  mutate(subclass_MD = dplyr::case_when(
    seurat_clusters %in% c(0,1,3,4,10,2,11) ~ "type_1",
    seurat_clusters %in% c(5,13,6,9,14) ~ "type_2a",
    seurat_clusters %in% c(7) ~ "type_2b",
      seurat_clusters %in% c(8) ~ "type_3",
    seurat_clusters == 12 ~ "type_4",
  ))


SO4@meta.data$subclass_MD <- factor(
  SO4@meta.data$subclass_MD, 
  levels = c("type_1", "type_2a", "type_2b","type_3","type_4")
)


SO4@meta.data <- SO4@meta.data %>%
  mutate(subclass2_MD = dplyr::case_when(
    seurat_clusters %in% c(0,1,3,4,10,2,11) ~ "type_1",
    seurat_clusters %in% c(5,13,6,9,14,7) ~ "type_2",
    seurat_clusters %in% c(8) ~ "type_3",
    seurat_clusters %in% c(12) ~ "type_4"
    
  ))

SO4@meta.data$subclass2_MD <- factor(
  SO4@meta.data$subclass2_MD, 
  levels = c("type_1", "type_2","type_3","type_4")
)

Idents(SO4) <- SO4@meta.data$subclass_MD

DimPlot(object = SO4, reduction = "umap", group.by = "subclass_MD", label = TRUE)

DimPlot(object = SO4, reduction = "umap", group.by = "subclass2_MD", label = TRUE)

DimPlot(SO4,group.by = "subclass2_MD",split.by = "treatment")

DimPlot(SO4,group.by = "seurat_clusters",split.by = "treatment")

8 Viewing genes that differentiate each Type

markers.to.plot3 <- c("Pappa2",
                      "Egf",
                      "Umod",
                      "Fos",
                      "Junb",
                      "Cxcl10",
                      "Isg15"
                      
)


DotPlot(SO4,
features = markers.to.plot3,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "subclass2_MD",
col.max = 2.5)+
coord_flip()

DotPlot(SO4,
features = markers.to.plot2,
dot.scale = 8,
dot.min = 0,
scale = FALSE,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
group.by = "subclass_MD",
col.max = 2.5)+
coord_flip()

Idents(SO4) <- "seurat_clusters"



SO4_DEGs <- FindAllMarkers(SO4, 
                          only.pos = FALSE,
                          logfc.threshold = 0.5,
                          min.pct = 0.2,
                          min.diff.pct = 0.05,
                          return.thresh = 0.05)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 10
## Calculating cluster 2
## Calculating cluster 11
## Calculating cluster 5
## Calculating cluster 13
## Calculating cluster 6
## Calculating cluster 9
## Calculating cluster 14
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 12
SO4_DEGs %>%
    group_by(cluster) %>%
    dplyr::filter(avg_log2FC > 1, p_val_adj < 0.05) %>%
    slice_head(n = 3) %>%
    ungroup() -> top3

DoHeatmap(SO4, features = top3$gene) + NoLegend()

top3
DimPlot(SO4,group.by = "seurat_clusters",split.by = "treatment")

9 Saving Seurat Object

save(SO4, file = here("jk_code", "JK_cleanMD_NEW.rds"))